setwd('~/Dropbox/Apps/Overleaf/Purge')

###############################################
###############################################
###############################################

# A1 Balance Tests ----

source('code/pre_analysis.R')

v <- c(cov_cnty, c('dr_1957', 'cps_native_p', 'cps_ht_north', 'log(zf_num_kill+1)', 'log(anticor)'))
m <- feols(ln_purge_pc ~ sw(.[, v]), cluster = ~province, data = df, subset = ~year == 1960)

out <- lapply(m, function(f) tidy(f)) %>% bind_rows() %>% filter(term != "(Intercept)") %>% data.table()
out[, term := plyr::mapvalues(v, from = labels(lab), to = lab, warn_missing = F)]
out[, n := lapply(m, function(x) x$nobs) %>% unlist()]
col <- c('Covariates', 'Coefficient', 'Std. Error', 'p-value', 'N')

out %>%
  select(term, estimate, std.error, p.value, n) %>%
  kbl(digits = 3, format = 'latex', booktabs = T, col.names = col) %>%
  add_header_above(c(" " = 1, "Outcome: log purge victim density" = 4)) %>%
  column_spec(4, bold = out$p.value < 0.05) %>% 
  pack_rows("Baseline", 1, length(cov_cnty), italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Additional", length(cov_cnty)+1, nrow(out), italic = T, latex_gap_space = '4mm') %>%
  save_kable('table/appendix/balance.tex')

###############################################
###############################################
###############################################

# A2 Missing on Observable ----

source('code/pre_analysis.R')

df[, miss := ifelse(is.na(purge), 1, 0)]
df[, dr58 := log(dr[year == 1958]), by = countyid]
df[, dr59 := log(dr[year == 1959]), by = countyid]
df[, dr60 := log(dr[year == 1960]), by = countyid]
df[, dr61 := log(dr[year == 1961]), by = countyid]

est <- feols(miss ~ sw(dr58, dr59, dr60, dr61, .[, cov_cnty]), cluster = ~province, data = df %>% filter(year == 1957))
out <- lapply(est, tidy) %>% bind_rows() %>% filter(term != "(Intercept)") %>% mutate(n = lapply(est, function(x) x$nobs) %>% unlist())

lab <- c('dr58' = 'Log mortality rate, 1958', 'dr59' = 'Log mortality rate, 1959', 'dr60' = 'Log mortality rate, 1960', 'dr61' = 'Log mortality rate, 1961', lab)
col <- c('Variable', 'Coefficient', 'Std. Error', 'p-value', 'N')

out %>%
  mutate(term = plyr::mapvalues(term, from = labels(lab), to = lab, warn_missing = F)) %>%
  select(term, estimate, std.error, p.value, n) %>%
  kbl(col.names = col, digits = 3, format = 'latex', booktabs = T) %>%
  column_spec(4, bold = out$p.value < 0.05) %>% 
  pack_rows("Famine mortality", 1, 4, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Baseline covariates", 5, nrow(out), italic = T, latex_gap_space = '4mm') %>%
  save_kable("table/appendix/corr_purge_NA.tex")

###############################################
###############################################
###############################################

# A4 Alternative Specifications ----

source('code/pre_analysis.R')

v <- names(df)[loc]

m <- feols(over_exc ~ ln_purge*glf + 
             csw(1, (.[v])*glf, (popu57 + weather + grain_pc57)*glf, (school + ln_clan)*glf, (num_cc + ln_ccp + ln_kmt)*glf) | 
             countyid + year, cluster = ~countyid, data = df)

n <- lapply(m, function(X) update(X, ln_dr ~ .))

etable(m, group = list('PSC covariates $\\times$ GLF' = 'psc_'), dict = lab, replace = T, file = 'table/appendix/alter_specification_1.tex')
etable(n, group = list('PSC covariates $\\times$ GLF' = 'psc_'), dict = lab, replace = T, file = 'table/appendix/alter_specification_2.tex')


###############################################
###############################################
###############################################

# A5 Interpolated Purge ----

source('code/pre_analysis.R')

f <- 'sw(over_exc, ln_dr) ~ sw(ln_purge*glf, log(purge_intp+1)*glf) + (.[cov])*glf | countyid + year'
m <- feols(formula(f), cluster = ~countyid, data = df)

etable(m, group = list('Covariates $\\times$ GLF' = '!purge'), dict = lab, replace = T, file = 'table/appendix/robust_interpolation.tex')

###############################################
###############################################
###############################################

# A6 Sensitivity Analysis (Measurement Errors) ----

source('code/pre_analysis.R')

df[, dr_exc := ifelse(mean(dr[year %in% 1954:1957], na.rm = T) < mean(dr[year %in% 1958:1961], na.rm = T), 1, 0), by = countyid]

sens_est <- function(ETA){
  df[, purge_true := ifelse(dr_exc == 1, purge*ETA, purge)]
  f <- 'ln_dr ~ log(purge_true+1)*glf + (.[cov])*glf | countyid + year'
  m <- feols(formula(f), cluster = ~ countyid, data = df)
  out <- tidy(m, conf.int = T) %>% filter(grepl('purge_true', term)) %>% return()
}

eta <- (11:30)/20
out <- lapply(eta, function (x) sens_est(ETA = x)) %>% bind_rows()

plot <- out %>%
  ggplot(aes(x = eta, y = estimate, ymin = conf.low, ymax = conf.high)) +
  annotate('text', x = 0.75, y = 0.15, label = 'over-reported') +
  annotate('text', x = 1.25, y = 0.15, label = 'under-reported') +
  geom_ribbon(color = 'gray85', alpha = .1, linetype = 0) + geom_line(linewidth = .7) + 
  geom_hline(yintercept = 0) + geom_vline(xintercept = 1, color = 'blue', linetype = 1) + mytheme() +
  labs(x = 'Purge victims: actual number / reported number', 
       y = 'Actual effect of purges \n on famine mortality')

ggsave(plot, width = 5.5, height = 4, filename = 'figure/appendix/sensitivity_measurement_error.pdf')

###############################################
###############################################
###############################################

# A7 Census Cohort Size ----

source('code/pre_analysis.R')

df[, mean(cohort, na.rm = T), by = glf]

f <- 'log(cohort) ~ sw(ln_purge*glf, ln_purge_pc*glf) + (.[cov])*glf | countyid + year'
m <- feols(formula(f), cluster = ~ countyid, data = df)

etable(m, group = list('Covariates $\\times$ GLF' = '!victim'), dict = lab, replace = T, file = 'table/appendix/robust_cohort.tex')

###############################################
###############################################
###############################################

# A8 Simulation of Omitted Variables ----

source('code/pre_analysis.R')

simu <- function(purge_share, rltv_effect){
  set.seed(123)
  misalign <- runif(1000, 0, 100)
  prob_p <- rnorm(1000, purge_share, 0.1)
  prob_p[prob_p < 0] <- 0
  purge <- misalign*prob_p
  error <- rnorm(1000, 0, 20)
  dr <- (1/rltv_effect)*(100-misalign) + purge + error
  dt <- data.frame(dr, misalign, purge)
  est <- feols(dr ~ purge, data = dt)
  su <- tidy(est, conf.int = T) %>% filter(term == 'purge')
  return(su)
}

share <- (1:50)/100
out <- bind_rows(lapply(share, function(x) simu(x, 5)) %>% bind_rows() %>% mutate(purge_share = share, rltv_eff = 5),
                 lapply(share, function(x) simu(x, 10)) %>% bind_rows() %>% mutate(purge_share = share, rltv_eff = 10),
                 lapply(share, function(x) simu(x, 20)) %>% bind_rows() %>% mutate(purge_share = share, rltv_eff = 20))

lab <- c(`5` = 'beta^kappa/beta^pi==5', `10` = 'beta^kappa/beta^pi==10', `20` = 'beta^kappa/beta^pi==20')

plot <- out %>% 
  ggplot(aes(x = purge_share, y = estimate, ymin = conf.low, ymax = conf.high)) + 
  facet_wrap(rltv_eff ~ ., labeller = as_labeller(lab, default = label_parsed)) +
  geom_hline(yintercept = 1, linetype = 1, linewidth = .4, color = 'blue') + 
  geom_ribbon(color = 'gray85', alpha = .1, linetype = 0) + geom_line(linewidth = .7) + geom_hline(yintercept = 0) + 
  mytheme() + labs(x = 'Share of misaligned bureaucrats purged', y = 'Estimated effect of purges')

ggsave(plot, width = 8, height = 3.5, filename = 'figure/appendix/simulation_loyalty.pdf')

###############################################
###############################################
###############################################

# A9 Sub-sample Analysis ----

source('code/pre_analysis.R')

sub_df <- df[countyid %in% df$countyid[df$year >= 1958 & df$turnover == 1]]
f <- 'sw(over_exc, ln_dr) ~ sw(ln_purge*glf, ln_purge_pc*glf) + (.[cov])*glf | countyid + year'
m <- feols(formula(f), cluster = ~ countyid, data = sub_df)

etable(m, group = list('Covariates $\\times$ GLF' = '!victim'), dict = lab, replace = T, file = 'table/appendix/subsample.tex')

###############################################
###############################################
###############################################

# A10 Sensitivity Analysis (Omitted Variables) ----

source('code/pre_analysis.R')

df[, exc_dr := mean(dr[year %in% 1958:1961], na.rm = T) - mean(dr[year %in% 1954:1957], na.rm = T), by = countyid]

f <- paste0('exc_dr ~ ', paste(c('ln_purge', cov), collapse = ' + ')) %>% as.formula()
m <- lm(f, data = df %>% filter(year == 1957))
sens <- sensemakr::sensemakr(m, treatment = 'ln_purge', benchmark_covariates = 'ln_clan', kd = 6)

pdf('figure/appendix/sensitive_clan.pdf', 5, 5)
par(oma = rep(0.75, 4))
plot(sens, lim = 0.2, lim.y = 0.5, col.thr.line = 'tomato4', nlevels = 6)
dev.off()

###############################################
###############################################
###############################################

# A12 RDD Robustness ----

source('code/pre_analysis.R')

rdd <- function(DV, FZ, P, BW){
  d <- rd
  dv <- d[, ..DV] %>% as.vector() %>% unlist()
  fz <- d[, ..FZ] %>% as.vector() %>% unlist()
  est <- rdrobust(y = dv, x = d$dist, fuzzy = fz, p = P, h = BW, covs = rdfe, cluster = d$grid, weights = 1/d$dist_abs)
  result <- tibble(poly = P, coef = est$coef[1], se = est$se[1], pval = est$pv[1], bw = est$bws[1,1], n = sum(est$N_h)) %>% as.data.frame()
  return(result)
}

bw <- 60+(1:14)*10

l <- c(lapply(bw, function (x) rdd(DV = 'ln_dr', FZ = 'ln_purge', P = 1, BW = x)),
       lapply(bw, function (x) rdd(DV = 'ln_dr', FZ = 'ln_purge', P = 2, BW = x)),
       lapply(bw, function (x) rdd(DV = 'ln_dr', FZ = 'ln_purge', P = 3, BW = x))) %>% bind_rows()

polylab <- c(`1` = 'First degree polynomial', `2` = 'Second degree polynomial', `3` = 'Third degree polynomial')

plot <- l %>%
  ggplot(aes(x = bw, y = coef, ymax = coef + 1.96*se, ymin = coef - 1.96*se)) + 
  facet_wrap(poly ~ ., labeller = as_labeller(polylab)) +
  geom_hline(yintercept = 0) + geom_point(size = 2.2) + geom_errorbar(width = 0) + mytheme() + 
  scale_y_continuous(expand = expansion(mult = c(.5, .5))) +
  labs(x = 'Bandwidth (kilometers)', y = 'RDD estimates')

ggsave(plot, width = 8, height = 3.3, filename = 'figure/appendix/RD_robust.pdf')

###############################################
###############################################
###############################################

# A13 RDD Falsification ----

source('code/pre_analysis.R')

rdd <- function(DV, FZ, P, BW){
  d <- rd
  dv <- d[, ..DV] %>% as.vector() %>% unlist()
  fz <- d[, ..FZ] %>% as.vector() %>% unlist()
  est <- rdrobust(y = dv, x = d$dist, fuzzy = fz, p = P, h = BW, covs = rdfe, cluster = d$grid, weights = 1/d$dist_abs)
  result <- tibble(poly = P, coef = est$coef[1], se = est$se[1], pval = est$pv[1], bw = est$bws[1,1], n = sum(est$N_h)) %>% as.data.frame()
  return(result)
}

out <- lapply(rd_var, function (x) rdd(DV = 'ln_dr', FZ = x, P = 1, BW = NULL)) %>% bind_rows()
out <- out %>% mutate(var = plyr::mapvalues(rd_var, from = labels(lab), to = lab, warn_missing = F))
col <- c('Variable', 'Coefficient', 'Std. Error', 'p-value', 'Bandwidth', 'N')

out %>% 
  mutate(bw = round(bw)) %>% relocate(var) %>% select(-poly) %>%
  kbl(digits = 2, format = 'latex', booktabs = T, col.names = col) %>%
  pack_rows("Political characteristics", 1, 3, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Socioeconomic characteristics", 4, 9, italic = T, latex_gap_space = '4mm') %>%
  pack_rows("Geographical characteristics", 10, 13, italic = T, latex_gap_space = '4mm') %>%
  save_kable('table/appendix/RD_placebo.tex')

###############################################
###############################################
###############################################



